Welcome to this blog post on performing trajectory inference using the Slingshot package in R. Here, we’ll walk through a full pipeline from preparing your data to visualizing inferred pseudotime and gene dynamics.
library(Seurat)
library(SingleCellExperiment)
library(ggplot2)
library(dplyr)
SedatFunctions::rds_function(8)
# Assuming you have a Seurat object named 'seurat_obj'
sce_obj <- as.SingleCellExperiment(Endo_sickle)
sce_obj <- sce_obj[rowSums(assay(sce_obj, "counts")) > 0, ]
library(slingshot)
pto <- slingshot(reducedDim(sce_obj,'UMAP'), clusterLabels = sce_obj$seurat_clusters,
start.clus = 3, end.clus = c(4, 5), spar = 1.1)
slsMST <- slingMST(pto, as.df = TRUE)
scater::plotReducedDim(sce_obj, dimred = 'UMAP', colour_by = 'seurat_clusters') +
ggplot2::geom_point(data = slsMST, aes(x = umap_1, y = umap_2), size = 3, color = "grey15") +
ggplot2::geom_path(data = dplyr::arrange(slsMST, Order),
ggplot2::aes(x = umap_1, y = umap_2, group = Lineage)) +
ggplot2::labs(title = "Slingshot MST on UMAP", x = "UMAP 1", y = "UMAP 2")
hvg_genes <- VariableFeatures(Endo_sickle)
rowData(sce_obj)$highly_variable <- rownames(sce_obj) %in% hvg_genes
sce_obj <- sce_obj[rowSums(assay(sce_obj, "counts")) > 0, ]
scater::plotReducedDim(sce_obj, 'UMAP', colour_by = 'seurat_clusters', text_by = 'seurat_clusters') + theme_bw()
emb <- slingshot::embedCurves(pto, reducedDim(sce_obj,'UMAP'))
emb <- slingshot::slingCurves(emb)
g <- scater::plotReducedDim(sce_obj, 'UMAP', text_by = 'seurat_clusters', colour_by='seurat_clusters')
for (path in emb) {
embedded_curves.df <- data.frame(path$s[path$ord,])
g <- g + ggplot2::geom_path(data=embedded_curves.df, aes(x=umap_1, y=umap_2), linewidth=1.2)
}
print(g)
sce_obj <- scater::runUMAP( sce_obj, dimred = 'PCA',
BPPARAM=BiocParallel::MulticoreParam(6),
ncomponents = 3, name = 'UMAP_3D'
)
#3
embedded <- slingshot::embedCurves(sce.sling, "UMAP")
embedded <- slingshot::slingCurves(embedded)
#4
g <- scater::plotReducedDim(sce.sling, 'UMAP', text_by = 'seurat_clusters', colour_by='seurat_clusters')
for (path in embedded) {
embedded_curves.df <- data.frame(path$s[path$ord,])
g <- g + ggplot2::geom_path(data=embedded_curves.df, aes(x=umap_1, y=umap_2), linewidth=1.2)
}
print(g)
#5
sce.sling$Pseudotime <- rowMeans(slingshot::slingPseudotime(sce.sling), na.rm=TRUE)
#6
g <- scater::plotReducedDim(sce.sling, 'UMAP', text_by = 'seurat_clusters', colour_by='Pseudotime')
for (path in embedded) {
embedded_curves.df <- data.frame(path$s[path$ord,])
g <- g + ggplot2::geom_path(data=embedded_curves.df, aes(x=umap_1, y=umap_2), linewidth=1.2)
}
print(g)
#7
embedded <- embedCurves(sce.sling, 'UMAP_3D')
library(dplyr)
library(plotly)
?reducedDim
plot.df <- as.data.frame(reducedDim( sce.sling, 'UMAP_3D' ) )
plot.df$cluster <- sce.sling[['seurat_clusters']]
colnames(plot.df) <- c('UMAP1', 'UMAP2', 'UMAP3', 'cluster')
redDim <- 'UMAP'
p <- plot_ly( ) %>% # colors = trace.colors ) %>%
add_trace( data = plot.df,
x = ~UMAP1, y = ~UMAP2, z = ~UMAP3,
color = ~cluster,
type = 'scatter3d',
mode = 'markers',
marker = list( size = 3, opacity = 1 )
)
for (i in 1:4) {
embedded_curves.tmp <- slingCurves(embedded)[[i]] # only 1 path
embedded_curves.tmp.df <- data.frame(embedded_curves.tmp$s[embedded_curves.tmp$ord,])
p <- p %>%
add_trace( data = embedded_curves.tmp.df,
x = ~UMAP1, y = ~UMAP2, z = ~UMAP3,
type = 'scatter3d', mode = "lines",
name = paste0("Lineage ", i),
line = list( width = 5, opacity = 1 )
)
}
p %>%
layout( scene = list( xaxis = list(title = paste0( redDim, '1') ),
yaxis = list(title = paste0( redDim, '2') ),
zaxis = list(title = paste0( redDim, '3') )
)
)
plot.df$Pseudotime <- sce.sling$Pseudotime
p <- plot_ly( ) %>% # colors = trace.colors ) %>%
add_trace( data = plot.df,
x = ~UMAP1, y = ~UMAP2, z = ~UMAP3,
type = 'scatter3d',
mode = 'markers',
showlegend = FALSE,
marker = list( color = ~Pseudotime,
size = 3,
colorscale = 'Viridis', # Viridis color scale
colorbar = list( # Add color scale bar,
title = 'Pseudotime',
x = 1.01, # horizontal position
y = 0.4, # vertical position
len = 0.8, # length (0 to 1, relative to plot height)
thickness = 30 # width in pixels
),
opacity = 1
)
)
for (i in 1:4) {
embedded_curves.tmp <- slingCurves(embedded)[[i]] # only 1 path
embedded_curves.tmp.df <- data.frame(embedded_curves.tmp$s[embedded_curves.tmp$ord,])
p <- p %>%
add_trace( data = embedded_curves.tmp.df,
x = ~UMAP1, y = ~UMAP2, z = ~UMAP3,
type = 'scatter3d', mode = "lines",
name = paste0("Lineage ", i),
line = list( width = 5, opacity = 1 )
)
}
p %>%
layout( scene = list( xaxis = list(title = paste0( redDim, '1') ),
yaxis = list(title = paste0( redDim, '2') ),
zaxis = list(title = paste0( redDim, '3') )
)
)
sce.sling <- scater::logNormCounts(sce.sling, assay.type="logcounts")
sce.sling
assayNames(sce.sling)
library(TSCAN)
# scran::testPseudotime(...)
pseudo <- TSCAN::testPseudotime( sce.sling,
pseudotime=sce.sling$Pseudotime,
BPPARAM = BiocParallel::MulticoreParam(6)
)
library(dplyr)
# ensembldb::filter(., FDR < 0.05)
pseudo.sig <- pseudo %>%
as.data.frame() %>%
dplyr::filter( FDR < 0.05 ) %>%
arrange( FDR )
topgenes <- rownames(pseudo.sig)[1:50]
scater::plotHeatmap(sce.sling,
order_columns_by='Pseudotime',
colour_columns_by="seurat_clusters",
features = topgenes,
center=TRUE,
scale=TRUE)
# Example for one gene (e.g., topgenes[1])
gene <- topgenes[39]
df <- data.frame(
pseudotime = sce.sling$Pseudotime,
expression = logcounts(sce.sling)[gene, ],
cluster = sce.sling$seurat_clusters
)
ggplot2::ggplot(df, aes(x = pseudotime, y = expression, color = cluster)) +
ggplot2::geom_point(alpha = 0.5) +
ggplot2::geom_smooth(method = "loess", se = FALSE) +
ggplot2::labs(title = gene, y = "Log-normalized expression")
ggplot2::ggplot(df, aes(x = pseudotime, y = expression, color = cluster)) +
ggplot2::geom_smooth(method = "loess", se = FALSE, size = 1.2) +
ggplot2::labs(title = gene, y = "Log-normalized expression")
message("Here is top genes:")
## Here is top genes:
topgenes
## [1] "Nckap5" "Calcrl" "Gm31243" "Bank1"
## [5] "Pax5" "Adgrl3" "Adgre5" "Fendrr"
## [9] "Dennd4a" "Ptprb" "Slfn14" "Tmem100"
## [13] "Slc4a1" "BE692007" "Blnk" "Tspan7"
## [17] "Slc16a10" "Hmbs" "Pim1" "Ldb2"
## [21] "Ikzf3" "Ptprm" "Hmcn1" "Tmem131l"
## [25] "Vwf" "Pde3a" "Epas1" "Btla"
## [29] "5430401H09Rik" "Rgs6" "Kit" "Aff3"
## [33] "Prickle2" "Arl15" "Gpx1" "Blvrb"
## [37] "Prkcb" "Bmpr2" "Ube2o" "Bmp6"
## [41] "Tbcel" "Trim10" "Cdk14" "Tent5c"
## [45] "Eif2ak1" "Ppp1r15a" "Creg1" "Tnfaip2"
## [49] "Hpgd" "Gypa"
scater::plotReducedDim(sce.sling, dimred = "UMAP", colour_by = topgenes[39])
That’s a wrap! You’ve now built a full Slingshot pipeline, from dimensionality reduction and clustering to trajectory inference, pseudotime visualization, and gene trend analysis.
Happy analyzing!